library(tidyverse)
library(patchwork)
library(ggrastr)

urls_df <- read_tsv('data/raw/url_reference_table.tsv')
domain_df <- read_tsv('data/raw/domain_reference_table.tsv')


url_cor <- with(urls_df |> filter(politics_label == 1), cor(url_score_continuous, url_score_reg, use = "pairwise.complete.obs"))
domain_cor <- with(domain_df, cor(domain_score_continuous_political, domain_score_reg_political, use = "pairwise.complete.obs"))
url_beta <- coef(lm(url_score_continuous ~ url_score_reg, data = urls_df |> filter(politics_label == 1)))[2]
domain_beta <- coef(lm(domain_score_continuous_political ~ domain_score_reg_political, data = domain_df))[2]

pearson <- round(url_cor, 2)
spearman <- round(url_beta, 2)

url_scatter <- ggplot(filter(urls_df, politics_label == 1), aes(x=url_score_continuous, y=url_score_reg)) +
    rasterize(geom_point(alpha = 0.01, color='black'), dpi=300) +
    geom_abline(linetype = 'dashed', alpha = 0.9, color = 'black') +
    theme_bw() +
    labs(x = "URL score (modeled partisanship)",
        y = "URL score (party registration)",
         title = bquote("URL (r ="~.(pearson)~","~italic(beta)~"="~.(spearman)~")")
        ) +

    coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1))
pearson <- round(domain_cor, 2)
spearman <- round(domain_beta, 2)                    
domain_scatter <- ggplot(domain_df, aes(x=domain_score_continuous_political, y=domain_score_reg_political)) +
    rasterize(geom_point(alpha = 0.2, color='black', size = 1), dpi=300) +
    geom_abline(linetype = 'dashed', alpha = 0.9, color = 'black') +
    theme_bw() +
    labs(x = "Domain score (modeled partisanship)",
        y = "Domain score (party registration)",
         title = bquote("Domain (r ="~.(pearson)~","~italic(beta)~"="~.(spearman)~")")
        ) +
    coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1))


score_v_reg <- url_scatter + domain_scatter
ggsave(score_v_reg, file = "results/fig_g2.pdf", height = 5*1.5, width = 8*1.5, dpi = 300)
ggsave(score_v_reg, file = "results/score_v_reg.pdf", height = 5*1.5, width = 8*1.5, dpi = 300)


url_cor_ind <- with(urls_df |> filter(politics_label == 1), cor(url_score_continuous, url_score_reg_ind, use = "pairwise.complete.obs"))
domain_cor_ind <- with(domain_df, cor(domain_score_continuous_political, domain_score_reg_ind_political, use = "pairwise.complete.obs"))
url_beta_ind <- coef(lm(url_score_continuous ~ url_score_reg_ind, data = urls_df |> filter(politics_label == 1)))[2]
domain_beta_ind <- coef(lm(domain_score_continuous_political ~ domain_score_reg_ind_political, data = domain_df))[2]


pearson <- round(domain_cor_ind, 2)
spearman <- round(domain_beta_ind, 2)
url_scatter_ind <- ggplot(filter(urls_df, politics_label == 1), aes(x=url_score_continuous, y=url_score_reg_ind)) +
    rasterize(geom_point(alpha = 0.01, color='black'), dpi=300) +
    geom_abline(linetype = 'dashed', alpha = 0.9, color = 'black') +
    theme_bw() +
    labs(x = "URL score (modeled partisanship)",
        y = "URL score (party registration)",
         title = bquote("URL (r ="~.(pearson)~","~italic(beta)~"="~.(spearman)~")")
        ) +
    coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1))

pearson <- round(domain_cor_ind, 2)
spearman <- round(domain_beta_ind, 2)
domain_scatter_ind <- ggplot(domain_df, aes(x=domain_score_continuous_political, y=domain_score_reg_ind_political)) +
    rasterize(geom_point(alpha = 0.2, color='black', size = 1), dpi=300) +
    geom_abline(linetype = 'dashed', alpha = 0.9, color = 'black') +
    theme_bw() +
    labs(x = "Domain score (modeled partisanship)",
        y = "Domain score (party registration)",
         title = bquote("Domain (r ="~.(pearson)~","~italic(beta)~"="~.(spearman)~")")
        ) +
    coord_cartesian(xlim = c(-1, 1), ylim = c(-1, 1))

score_v_reg_ind <- url_scatter_ind + domain_scatter_ind
ggsave(score_v_reg_ind, file = "results/fig_g3.pdf", height = 5*1.5, width = 8*1.5, dpi = 300)
ggsave(score_v_reg_ind, file = "results/score_v_reg_ind.pdf", height = 5*1.5, width = 8*1.5, dpi = 300)


read_vf <- function(x) {
    read_tsv(x, col_types = cols_only(
        tsmart_census_id = col_character(),
        tsmart_partisan_score = col_number())
            ) |>
        filter(!is.na(tsmart_census_id), !is.na(tsmart_partisan_score))
             }

df <- list.files("data/voter_data/full/", 
           full.names = TRUE, 
            pattern = "tsmart_northeastern\\w+.csv") |> 
    map_dfr(read_vf)

agg <- df |> 
    mutate(county_fips = str_sub(tsmart_census_id, 1, 5)) |> 
    group_by(county_fips) |> 
    summarise(n = n(), 
              score = mean(tsmart_partisan_score))


results <- read_tsv("data/countypres_2000-2020.tab") |>
    mutate(county_fips = str_pad(county_fips, 5, "left", "0")) |>
    mutate(label = glue::glue("{str_to_title(county_name)} County, {state_po}")) |>
    filter(!is.na(county_fips), year == 2016, office == "PRESIDENT", candidate %in% c("HILLARY CLINTON", "DONALD TRUMP")) |>
    select(county_fips, label, candidate, candidatevotes) |>
    pivot_wider(names_from = candidate, values_from = candidatevotes) |>
    mutate(twoway = (`HILLARY CLINTON`)/(`HILLARY CLINTON` + `DONALD TRUMP`), total = `HILLARY CLINTON` + `DONALD TRUMP`) |>
    filter(!str_detect(county_fips, "000$")) # exclude invalid counties

plot_df <- inner_join(agg, results)
plot_df <- plot_df |> mutate(state = str_sub(label, -2)) |> mutate(highlight = state %in% c("KY", "WV"))

county_plot <- ggplot(plot_df, aes(x=score, y=twoway, size = total, color = highlight)) + 
     geom_point(alpha = 0.6) + 
    geom_abline(intercept = 0, slope = .01, alpha = 0.8, linetype = 'dashed') +
    coord_cartesian(xlim = c(0, 100), ylim = c(0, 1)) +
    scale_color_manual(values = c("gray30", "firebrick"), guide="none") +
    theme_bw() + 
    scale_size_continuous(range = c(0.2, 5), guide = "none") + 
    scale_y_continuous(labels = scales::percent) +
    labs(x = "Average modeled partisanship (2017)",
         y = "Two-way Hillary Clinton vote share (2016)",
        )

ggsave(county_plot, file = "results/fig_g1.pdf", width = 8, height = 6, dpi = 300)
ggsave(county_plot, file = "results/county_plot.pdf", width = 8, height = 6, dpi = 300)
